---
title: "List experiment"
author: "Kota Suechika"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
output: html_document
---

```{r setup, include=FALSE}
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE, fig.width = 10, fig.height = 5)
library(haven)
require(memisc)
require(stringi)
library(stargazer)
library(coefplot)
library(interplot)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(stargazer)
library(stringr)
library(tidyverse)
library(ggplot2)
library(psych)
library(dplyr)
library(modelsummary)
library(makedummies)
library(ggeffects)
library(estimatr)
library(ggpubr)
library(BalanceR)

theme_set(theme_sjplot())

dat <- read_csv("data/Lebanon.csv")
dat <- dat[-1,]

# List 1
dat <- dat |>
  mutate(treatment = `Q12-t_1`) |>
  mutate(control = `Q12-c_1`)|>
  mutate(List_experiment_Hezbollah = case_when(control >= 0 ~  "Control",
                                               treatment >= 0 ~ "Treatment")) |>
  mutate(Treatment = ifelse(List_experiment_Hezbollah == "Treatment", "1", "0"))

dat$`Q12-c_1`[is.na(dat$`Q12-c_1`)] <- 0
dat$`Q12-t_1`[is.na(dat$`Q12-t_1`)] <- 0

dat <- dat |>
  mutate(Hezbollah_experiment = `Q12-c_1` + `Q12-t_1`)

# List 2
dat <- dat |>
  mutate(treatment2 = `Q13-t_1`) |>
  mutate(control2 = `Q13-c_1`)|>
  mutate(List_experiment_normalization = case_when(control2 >= 0 ~  "Control",
                                                   treatment2 >= 0 ~ "Treatment")) |>
  mutate(Treatment2 = ifelse(List_experiment_normalization == "Treatment", 1, 0))

dat$`Q13-c_1`[is.na(dat$`Q13-c_1`)] <- 0
dat$`Q13-t_1`[is.na(dat$`Q13-t_1`)] <- 0

dat <- dat |>
  mutate(Normalization_experiment = `Q13-c_1` + `Q13-t_1`)

# unexpected event
dat$RecordedDate <- as.Date(stri_match_first_regex(dat$RecordedDate, "\\d{4}-\\d{2}-\\d{2}"))

dat$ceasefire <- NA
dat$ceasefire[dat$RecordedDate <= as.Date("2025-01-16")] <- "Pre"
dat$ceasefire[as.Date("2025-01-17") <= dat$RecordedDate] <- "Post"
dat$ceasefire <- factor(dat$ceasefire)

dat <- dat |>
  mutate(Gender = D1,
         Age = D2,
         Education = D3,
         Income = D5)

# Ceasefire: unexpected event during survey
dat$RecordedDate <- as.Date(stri_match_first_regex(dat$RecordedDate, "\\d{4}-\\d{2}-\\d{2}"))

dat$ceasefire <- NA
dat$ceasefire[dat$RecordedDate <= as.Date("2025-01-16")] <- "Pre"
dat$ceasefire[as.Date("2025-01-17") <= dat$RecordedDate] <- "Post"
dat$ceasefire <- factor(dat$ceasefire)

# Shia party supporter
dat <- dat |> 
  mutate(Shia_party_supporter = (Q4_2 + Q4_3)/2) |>
  mutate(Shia_party_supporter = ifelse(Shia_party_supporter >= 7, "Shia-party supporter", "Others"))
dat$Shia_party_supporter <- as.factor(dat$Shia_party_supporter)

# Patronage
dat <- dat |> 
  mutate(Patronage = case_when(Q4 >= 3 ~ "High",
                               Q4 <= 2 ~ "Low"))
dat$Patronage <- as.factor(dat$Patronage)

```

# balance check: List 1
```{r}
balance <- BalanceR(data = dat, 
                    group = List_experiment_Hezbollah,
                    cov = c(Gender, Age, Education, Income))
print(balance, digits = 3)
summary(balance, digits = 5)
plot(balance, point.size = 5, text.size = 18)

balance_LB <- plot(balance, point.size = 5, text.size = 18) +
  labs(title = "List experiment: balance check")
ggsave(filename = "result/balance_list_lb.png",
       plot = balance_LB,
       width = 7,
       height = 4,
       units = "in", 
       dpi = 600)
```

# balance check: List 1 refined
```{r}

balance <- BalanceR(data = dat, 
                    group = List_experiment_Hezbollah,
                    cov = c(Gender, Age, Education, Income))
p_base <- plot(balance, point.size = 4, text.size = 12)

balance_LB <- p_base +
  geom_vline(xintercept = c(-0.1, 0.1), linetype = "dashed", color = "gray50") +
  geom_vline(xintercept = 0, color = "black") +
  labs(title = "",
       subtitle = "Standardized Mean Difference",
       x = "Standardized Bias",
       y = "") +
  theme_bw(base_family = "sans", base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0),    
    plot.subtitle = element_text(color = "gray40"),         
    axis.text.y = element_text(color = "black", size = 12),  
    axis.text.x = element_text(color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(color = "gray90", linetype = "dotted"),
    panel.border = element_rect(color = "black", fill = NA)
  )

print(balance_LB)

ggsave(filename = "result/balance_list_lb.png",
       plot = balance_LB,
       width = 8,
       height = 5,
       units = "in", 
       dpi = 600)
```

# balance check: List 2
```{r}
balance <- BalanceR(data = dat, 
                    group = List_experiment_normalization,
                    cov = c(Gender, Age, Education, Income))
print(balance, digits = 3)
summary(balance, digits = 5)
plot(balance, point.size = 5, text.size = 18)
```

# overview dependent variavles: List 1
```{r}
hist(dat$Hezbollah_experiment)
dat |> 
  filter(!is.na(Hezbollah_experiment)) |> 
  ggplot() +
  geom_histogram(aes(x = Hezbollah_experiment, bins = 5)) +
  labs(x = "Experiment", y = "frequency")
```

# List1 results
```{r}
mean(dat$treatment, na.rm = TRUE) - mean(dat$control, na.rm = TRUE)
t.test(dat$control, dat$treatment)

list1 <- dat |> 
  summarise(control_mean = mean(control, na.rm = TRUE), control_SE = sd(dat$control, na.rm = TRUE) / sqrt(length(dat$control)),
            treatment_mean = mean(treatment, na.rm = TRUE), treatment_SE = sd(dat$treatment, na.rm = TRUE) / sqrt(length(dat$treatment)))

df1 <- data.frame(
  Experiment = c("Control", "Treatment"),
  Mean = c(list1$control_mean, list1$treatment_mean),
  SE = c(list1$control_SE, list1$treatment_SE)
)

p1 <- ggplot(df1, aes(x = Experiment, y = Mean, group = Experiment)) +
  geom_point(stat = "identity", color = "black") +
  geom_errorbar(aes(ymax = Mean+SE, ymin = Mean-SE), width = 0.2, position = position_dodge(.9)) +
  labs(title = "List Experiment") +
  theme_bw()

p1
ggsave(filename = "result/list1_lebanon.png",
       plot = p1,
       width = 4,
       height = 4,
       units = "in", 
       dpi = 600)
```

# List 1 results refined
```{r}
p1_grid <- ggplot(df1, aes(x = Experiment, y = Mean)) +
  geom_errorbar(aes(ymax = Mean + SE, ymin = Mean - SE), 
                width = 0.1, size = 0.8, color = "black") +
  geom_point(stat = "identity", size = 5, color = "black") +
  labs(title = "",
       x = "", 
       y = "Mean Item Count") +
  theme_bw(base_size = 16) + 
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5, size = 18),
    axis.text = element_text(color = "black"),
    panel.grid.major.x = element_blank(), 
    panel.grid.minor.x = element_blank(), 
    panel.grid.major.y = element_line(color = "gray85"), 
    panel.grid.minor.y = element_blank(), 
    panel.border = element_blank(),            
    axis.line = element_line(color = "black") 
  )

print(p1_grid)

ggsave(filename = "result/list1_Hezbollah_disapproval_grid.png",
       plot = p1_grid, width = 5, height = 5, dpi = 600)
```

# overview dependent variavles: List 2
```{r}
dat |> 
  filter(!is.na(Normalization_experiment)) |> 
  ggplot() +
  geom_histogram(aes(x = Normalization_experiment, bins = 5)) +
  labs(x = "Experiment", y = "frequency")
```

# regression: List 1
```{r}
reg01 <- lm(formula = Hezbollah_experiment ~ Treatment, data = dat)
summary(reg01)

reg02 <- lm(formula = Hezbollah_experiment ~ Treatment +
              ceasefire +
              Gender + Age + Education + Income + Shia_party_supporter + Patronage, 
            data = dat)
summary(reg02)

reg03 <- lm(formula = Hezbollah_experiment ~ Treatment * ceasefire +
              Gender + Age + Education + Income + Shia_party_supporter + Patronage, 
            data = dat)
summary(reg03)

reg04 <- lm(formula = Hezbollah_experiment ~ Treatment * (Gender + Age + Education + Income + Shia_party_supporter + Patronage + ceasefire),
            data = dat)
summary(reg04)

install.packages(c("stargazer","sandwich","lmtest"))
library(stargazer)
library(sandwich)
library(lmtest)

if (!dir.exists("result")) dir.create("result", recursive = TRUE)

se1 <- sqrt(diag(vcovHC(reg01, type = "HC2")))
se2 <- sqrt(diag(vcovHC(reg02, type = "HC2")))
se3 <- sqrt(diag(vcovHC(reg03, type = "HC2")))
se4 <- sqrt(diag(vcovHC(reg04, type = "HC2")))

coef_labels <- c(
  "Treatment",
  "Ceasefire (Pre)",
  "Gender",
  "Age",
  "Education",
  "Income",
  "Shia party supporter",
  "Patronage",
  "Treatment × Ceasefire (Pre)"
)

stargazer(reg01, reg02, reg03, reg04,
  type = "html",
  out = "result/List_OLS.html",
  title = "OLS Regression Results (List Experiment Outcome)",
  dep.var.labels = "Hezbollah Experiment",
  column.labels = c("M1","M2","M3","M4"),
  se = list(se1, se2, se3, se4),
  covariate.labels = coef_labels,   
  omit.stat = c("f"),             
  notes = "HC2 robust SE in parentheses.",
  notes.align = "l",
  digits = 3
)

stargazer(reg01, reg02, reg03, reg04,
  type = "html",
  out = "result/List_OLS.html",
  title = "Table A.1. OLS Regression Results for the List Experiment Outcome (“Hezbollah Experiment”)",
  dep.var.labels = "Hezbollah Experiment (List Outcome)",
  column.labels = c("Model 1", "Model 2", "Model 3", "Model 4"),
  se = list(se1, se2, se3, se4),
  covariate.labels = c("Treatment",
                       "Ceasefire (Pre)",
                       "Gender",
                       "Age",
                       "Education",
                       "Income",
                       "Shia party supporter",
                       "Patronage",
                       "Treatment × Ceasefire (Pre)",
                       "Treatment × Gender",
                       "Treatment × Age",
                       "Treatment × Education",
                       "Treatment × Income",
                       "Treatment × Shia party supporter",
                       "Treatment × Patronage",
                       "Treatment × Ceasefire (Pre)"
                       ),
  omit.stat = c("f"),
  notes = "Notes: Models 1–4 present OLS regressions predicting the number of endorsed items in the list experiment measuring latent support for Hezbollah. Model 1 includes only the treatment assignment. Model 2 adds demographic and political covariates (gender, age, education, income, prior support for Shia political parties, and patronage dependence). Model 3 further includes the ceasefire indicator and an interaction between treatment and ceasefire exposure. Model 4 incorporates interaction terms between treatment assignment and all covariates, capturing subgroup heterogeneity in the treatment effect and following the logic of Blair and Imai (2012). Robust (HC2) standard errors are reported in parentheses. The dependent variable is the total number of items endorsed in the list experiment (0–5). All models use weighted data and are estimated on the full sample (N = 823). p < .10 †, p < .05 *, p < .01 **, p < .001 ***.",
  notes.align = "l",
  digits = 3
)
```

# Marginal effect: List 1
```{r}
plot_model(reg01, type = "pred", terms = c("Treatment"),
           axis.labels = "Treatment", title = "")

int_reg01 <- interplot(m = reg03, var1 = "Treatment1", var2 = "ceasefire", point = TRUE) +
  labs(x = "Ceasfire",
       y = "Effect of treatment") +
  ggtitle("Marg. eff. of Ceasefire") +
  geom_hline(yintercept = 0, linetype = "dashed")
print(int_reg01)
```

# regression: List 2
```{r}
reg_L201 <- lm(formula = Normalization_experiment ~ Treatment2, data = dat)
summary(reg_L201)

reg_L02 <- lm(formula = Normalization_experiment ~ Treatment2 +
              ceasefire +
              Gender + Age + Education + Income, 
            data = dat)
summary(reg_L02)

reg_L03 <- lm(formula = Normalization_experiment ~ Treatment2 * ceasefire +
              Gender + Age + Education + Income, 
            data = dat)
summary(reg_L03)


```

# Marginal effect: List 2
```{r}
int_reg_L01 <- interplot(m = reg_L03, var1 = "Treatment2", var2 = "ceasefire", point = TRUE) +
  labs(x = "Ceasfire",
       y = "Effect of treatment") +
  ggtitle("Marg. eff. of Ceasefire") +
  geom_hline(yintercept = 0, linetype = "dashed")
print(int_reg_L01)
```

#Appendix: List 1 = Hezbollah support
#Appendix: List Experiment diagnostics (distribution & floor/ceiling)
```{r}
# Control: Q12-c_1, Treatment: Q12-t_1
list1_arm <- dat |>
  dplyr::transmute(
    arm = List_experiment_Hezbollah,                
    y   = dplyr::if_else(arm == "Control", `Q12-c_1`, `Q12-t_1`)
  ) |>
  dplyr::filter(!is.na(arm), !is.na(y))

# Basic statistics and floor/ceiling（％）
list1_diag <- list1_arm |>
  dplyr::group_by(arm) |>
  dplyr::summarise(
    n          = dplyr::n(),
    mean       = mean(y),
    sd         = sd(y),
    min_obs    = min(y),
    max_obs    = max(y),
    floor_pc   = 100 * mean(y == 0),
    ceiling_pc = {
      mx <- max(y)
      100 * mean(y == mx)
    },
    .groups = "drop"
  )

knitr::kable(
  list1_diag,
  caption = "List Experiment diagnostics: arm-wise distribution and floor/ceiling (%)",
  digits = 2
)

k_min <- min(list1_arm$y)
k_max <- max(list1_arm$y)

p_hist <- ggplot(list1_arm, aes(x = y, fill = arm)) +
  geom_histogram(position = "identity", alpha = 0.5, bins = (k_max - k_min + 1)) +
  scale_x_continuous(breaks = seq(k_min, k_max, by = 1)) +
  labs(
    title = "",
    x = "Number of endorsed items",
    y = "Count",
    fill = "Arm"
  ) +
  theme_bw() +
  theme(legend.position = "top")

p_hist
ggsave("result/list1_distribution_by_arm.png", p_hist, width = 6.5, height = 4.2, dpi = 600)
```

# Appendix: Difference-in-means with 95% CI
```{r}

dat$`Q12-c_1` <- as.numeric(as.character(dat$`Q12-c_1`))
dat$`Q12-t_1` <- as.numeric(as.character(dat$`Q12-t_1`))

y_control <- dat$`Q12-c_1`[dat$List_experiment_Hezbollah == "Control"]
y_treat   <- dat$`Q12-t_1`[dat$List_experiment_Hezbollah == "Treatment"]
y_control <- y_control[!is.na(y_control)]
y_treat   <- y_treat[!is.na(y_treat)]

t_res <- t.test(y_control, y_treat) 
t_res


list1_diff_tab <- data.frame(
  stat  = c("Mean (Control)", "Mean (Treatment)", "Difference (T - C)",
            "95% CI (low)", "95% CI (high)", "p-value"),
  value = c(
    mean(y_control, na.rm = TRUE),
    mean(y_treat,   na.rm = TRUE),
    mean(y_treat, na.rm = TRUE) - mean(y_control, na.rm = TRUE),
    t_res$conf.int[1],
    t_res$conf.int[2],
    t_res$p.value
  )
)

knitr::kable(
  list1_diff_tab,
  caption = "Difference-in-means for List Experiment",
  digits = 3
)

dm_tab <- list1_arm |>
  dplyr::group_by(arm) |>
  dplyr::summarise(
    mean = mean(y),
    se   = sd(y)/sqrt(dplyr::n()),
    .groups = "drop"
  )

p_dm <- ggplot(dm_tab, aes(x = arm, y = mean)) +
  geom_point() +
  geom_errorbar(aes(ymin = mean - 1.96 * se, ymax = mean + 1.96 * se), width = 0.15) +
  labs(
    title = "",
    x = NULL, y = "Mean endorsed items"
  ) +
  theme_bw()

p_dm
ggsave("result/list1_diff_in_means.png", p_dm, width = 4.2, height = 3.6, dpi = 600)
```

# Appendix Figure: Distribution overlay
```{r}

# numeric
dat$`Q12-c_1` <- suppressWarnings(as.numeric(dat$`Q12-c_1`))
dat$`Q12-t_1` <- suppressWarnings(as.numeric(dat$`Q12-t_1`))

# arms
dat$List_experiment_Hezbollah <- factor(dat$List_experiment_Hezbollah,
                                        levels=c("Control","Treatment"))

# NA
list1_arm <- dat |>
  dplyr::filter(List_experiment_Hezbollah %in% c("Control","Treatment")) |>
  dplyr::mutate(
    y_raw = dplyr::case_when(
      List_experiment_Hezbollah == "Control"   ~ `Q12-c_1`,
      List_experiment_Hezbollah == "Treatment" ~ `Q12-t_1`
    )
  ) |>
  dplyr::filter(!is.na(y_raw) & y_raw %in% 0:5) |>
  dplyr::mutate(y = as.integer(y_raw))

# deleted rows
n_removed <- nrow(dat) - nrow(list1_arm)
message("Rows removed before plotting: ", n_removed)


list1_arm <- dat %>%
  mutate(
    y = case_when(
      List_experiment_Hezbollah == "Control" ~ as.numeric(`Q12-c_1`),
      List_experiment_Hezbollah == "Treatment" ~ as.numeric(`Q12-t_1`)
    )
  ) %>%
  filter(!is.na(y))

# plot
ggplot(list1_arm, aes(x = y, fill = List_experiment_Hezbollah)) +
  geom_histogram(position = "identity", alpha = 0.45, bins = 6, color = "white", boundary = 0) +
  scale_x_continuous(breaks = 0:5, limits = c(-0.5, 5.5)) +
  labs(
    title = "Distribution of Endorsed Items in the List Experiment",
    subtitle = "Control and treatment groups show no floor or ceiling effects",
    x = "Number of endorsed items (0–5)",
    y = "Count",
    fill = "Experimental Arm"
  ) +
  scale_fill_manual(values = c("Control" = "#F4A582", "Treatment" = "#92C5DE")) +
  theme_bw(base_size = 13) +
  theme(legend.position = "top")

ggsave("result/Appendix_List1_distribution_overlay.png", width = 6, height = 4, dpi = 600)
```
#Appendix Figure: Difference-in-Means plot ---
```{r}
dm_tab <- list1_arm %>%
  group_by(List_experiment_Hezbollah) %>%
  summarise(
    mean = mean(y),
    se = sd(y) / sqrt(n()),
    .groups = "drop"
  )

ggplot(dm_tab, aes(x = List_experiment_Hezbollah, y = mean)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = mean - 1.96*se, ymax = mean + 1.96*se),
                width = 0.15) +
  geom_hline(yintercept = mean(dm_tab$mean), linetype = "dashed",
             color = "grey50") +
  labs(
    title = "Mean Number of Endorsed Items by Experimental Arm (List Experiment)",
    x = "Experimental Arm",
    y = "Mean endorsed items (0–5)"
  ) +
  theme_bw(base_size = 12)

ggsave("result/Appendix_List1_mean_diff.png", width = 5, height = 4, dpi = 600)

```

# Appendix Table: Descriptive statistics for the list experiment
```{r}
desc_tab <- dat %>%
  filter(!is.na(List_experiment_Hezbollah)) %>%
  group_by(List_experiment_Hezbollah) %>%
  summarise(
    N = n(),
    Mean = mean(Hezbollah_experiment, na.rm = TRUE),
    SD = sd(Hezbollah_experiment, na.rm = TRUE),
    Min = min(Hezbollah_experiment, na.rm = TRUE),
    Max = max(Hezbollah_experiment, na.rm = TRUE),
    Floor_0 = mean(Hezbollah_experiment == 0, na.rm = TRUE),
    Ceiling_5 = mean(Hezbollah_experiment == 5, na.rm = TRUE)
  )

print(desc_tab)

# Optional: export as HTML for appendix
library(gt)
desc_tab %>%
  mutate(across(where(is.numeric), ~round(.x, 3))) %>%
  gt() %>%
  tab_header(
    title = "Descriptive Statistics for the List Experiment"
  ) %>%
  gtsave("result/Appendix_List1_descriptive_table.html")

tab_fc <- list1_arm |>
  dplyr::group_by(List_experiment_Hezbollah) |>
  dplyr::summarise(
    n = dplyr::n(),
    floor_n = sum(y == 0),
    ceiling_n = sum(y == 5),
    floor_pct = round(100*floor_n/n,1),
    ceiling_pct = round(100*ceiling_n/n,1),
    mean_y = round(mean(y),3)
  )
print(tab_fc)
```